# Setting the working directory to the "Replication" folder (the folder stores the data set used in the article)
setwd("C:/Users/ivan.petrusek/Desktop/Replication")

# Reading the merged Czech Attitude Barometer data set into R (wave 1 and wave 2)
library(haven)
cab <- read_sav("CAB_waves_1_and_2.sav", user_na = FALSE)

# Excluding respondents who contain at least one missing value (on the variables employed in our analyses)
cab_listwise <- cab[complete.cases(cab[, c("w1_vzdel_2", "devitka_excl", "years_since", "high_interest",
                                           "no_kids", "has_kids", "w1_left_right",
                                           "female", "university", "interakce")]), ]
nrow(cab_listwise)


##### Table 2. Attitude toward the length of basic education and personal experience (row %).
raw_table <- table(cab_listwise$devitka_excl, cab_listwise$w1_vzdel_2)
row_perc <- round(prop.table(raw_table, margin = 1) * 100, 2)
# Row marginal counts
row_ns <- rowSums(raw_table)
total_n <- sum(raw_table)

# Column marginal percentages
prop.table(table(cab_listwise$w1_vzdel_2)) * 100

# Chi-square test of independence
chisq_test <- chisq.test(raw_table)
print(chisq_test)

# Cramér's V
phi <- sqrt(chisq_test$statistic / sum(raw_table))
v_value <- phi / sqrt(min(dim(raw_table)) - 1)
print(v_value)


##### Figure 1. Attitude toward the length of basic education and year of birth.
### Preparing the data: calculate means and 3-year moving average
# Aggregating mean of attitudes by year of birth
agg_data <- aggregate(w1_vzdel_2 ~ year_birth, 
                      data = cab_listwise, 
                      FUN = mean, na.rm = TRUE)

# Calculating 3-year moving average using stats::filter
ma <- stats::filter(agg_data$w1_vzdel_2, rep(1/3, 3), sides = 2)

### Setting up the plotting area and othe figure parameters
par(mar = c(4, 4, 0.5, 1) + 0.1)
# Creating the empty plot first to define coordinates
plot(x = agg_data$year_birth, y = ma, 
     type = "n", 
     ylim = c(1, 5), xlim = c(1938, 2008),
     xaxt = "n", yaxt = "n", 
     xlab = "Year of Birth", 
     ylab = "Average Agreement with Ninth Grade Abolition", 
     bty = "n", main = "")

# Adding background horizontal grid
grid(nx = NA, ny = NULL, col = "lightgray", lty = "solid")

# Adding vertical era markers
era_lines <- c(1938.5, 1946.5, 1965.5, 1982.5)
abline(v = era_lines, col = "salmon", lwd = 2.5)

### Adding the main line and points
lines(agg_data$year_birth, ma, col = "gray25", lwd = 1.5)
points(agg_data$year_birth, ma, col = "gray25", pch = 19, cex = 1.1)

### Adding the axes
# Y-axis
y_at <- seq(1, 5, 0.5)
axis(side = 2, at = y_at, 
     labels = y_at, 
     las = 1, 
     tick = TRUE, 
     col = "lightgray")

# X-axis
axis(side = 1, at = 1938:2008, 
     labels = 1938:2008, 
     las = 2, 
     cex.axis = 0.75, 
     tick = TRUE)

### Adding era labels
text(x = 1942.5, y = 1.5, labels = "eight years", font = 2, col = "salmon")
text(x = 1956.0, y = 1.5, labels = "nine years", font = 2, col = "salmon")
text(x = 1974.0, y = 1.5, labels = "eight years", font = 2, col = "salmon")
text(x = 1995.0, y = 1.5, labels = "nine years", font = 2, col = "salmon")


##### Table A1. Balance tests: Comparison of covariates by length of basic school experience.
# Creating a list of all independent variables to test
vars_to_test <- c("years_since", "high_interest", "no_kids", "has_kids", 
                  "w1_left_right", "female", "university")

# Creating balance test results data frame
balance_results <- data.frame(Variable = character(),
                              Mean_8years = numeric(),
                              SD_8years = numeric(),
                              Mean_9years = numeric(),
                              SD_9years = numeric(),
                              Difference = numeric(),
                              SE = numeric(),
                              t_statistic = numeric(),
                              p_value = numeric(),
                              stringsAsFactors = FALSE)

### Conducting independent samples t-tests for each independent variable
# The two independent groups are defined by the treatment (i.e. 8 years vs. 9 years)
for (var in vars_to_test) {
  group_0 <- cab_listwise$devitka_excl == 0
  group_1 <- cab_listwise$devitka_excl == 1
  
  var_0 <- cab_listwise[[var]][group_0]
  var_1 <- cab_listwise[[var]][group_1]
  
  # Calculating descriptive statistics
  mean_0 <- mean(var_0, na.rm = TRUE)
  sd_0 <- sd(var_0, na.rm = TRUE)
  mean_1 <- mean(var_1, na.rm = TRUE)
  sd_1 <- sd(var_1, na.rm = TRUE)
  
  # Conducting the independent samples t-tests
  t_test <- t.test(var_1, var_0, paired = FALSE)
  
  # Storing the results
  balance_results <- rbind(balance_results, data.frame(
    Variable = var,
    Mean_8years = mean_0,
    SD_8years = sd_0,
    Mean_9years = mean_1,
    SD_9years = sd_1,
    Difference = mean_1 - mean_0,
    SE = (t_test$conf.int[2] - t_test$conf.int[1]) / (2 * 1.96),
    t_statistic = t_test$statistic,
    p_value = t_test$p.value))
}

# Rounding the numbers in the table
balance_results[, 2:8] <- round(balance_results[, 2:8], 3)
balance_results$p_value <- round(balance_results$p_value, 4)

# Saving Table A1 to MS Excel file
library(openxlsx)
write.xlsx(balance_results, 
           file = "Balance_Results.xlsx", 
           asTable = TRUE, 
           gridLines = FALSE)



########## OLS Linear Regression Models (Table A3. Robustness checks: Linear regression models of attitude toward the length of basic education.)
# Model 1 - only the key explanatory variable
model_1 <- lm(w1_vzdel_2 ~ devitka_excl, 
              data = cab_listwise)

# Model 2 - adding all control variables
model_2 <- lm(w1_vzdel_2 ~ devitka_excl + years_since + high_interest + 
                no_kids + has_kids + w1_left_right + female + university, 
              data = cab_listwise)

# Model 3 - all predictors + interaction term between personal experience and high political interest
model_3 <- lm(w1_vzdel_2 ~ devitka_excl + years_since + high_interest + 
                no_kids + has_kids + w1_left_right + female + university + 
                devitka_zajem, 
              data = cab_listwise)

# Model 4 - all predictors + interaction term between personal experience and the number of years since compelting basic school
model_4 <- lm(w1_vzdel_2 ~ devitka_excl + years_since + high_interest + 
                no_kids + has_kids + w1_left_right + female + university + 
                interakce, 
              data = cab_listwise)

# Exporting the model results (as the basis for Table A3.)
library(sjPlot)
tab_model(model_1, model_2, model_3, model_4, 
          digits = 3, digits.p = 4,
          show.se = TRUE, 
          show.std = TRUE,
          show.ci = FALSE, 
          show.r2 = TRUE,
          string.est = "Estimate", string.se = "SE", 
          p.threshold = c(0.05, 0.01, 0.001),
          p.style = "stars")



########## Ordered Logit Models (Table 3. Ordered logit models of attitude toward the length of basic education.)
library(MASS)

# Converting the dependent variable to aa ordered factor
cab_listwise$w1_vzdel_2_ord <- factor(cab_listwise$w1_vzdel_2, 
                                      levels = 1:5, 
                                      labels = c("Strongly agree", "Somewhat agree", "Neither agree nor disagree",
                                                 "Somewhat disagree", "Strongly disagree"),
                                      ordered = TRUE)

# Model 1 - only the key explanatory variable
model_1_ord <- polr(w1_vzdel_2_ord ~ devitka_excl, 
                data = cab_listwise, 
                Hess = TRUE)
# Proportional Odds test
install.packages("brant")
library(brant)
res_brant_1 <- brant(model_1_ord)

# Model 2 - adding all control variables
model_2_ord <- polr(w1_vzdel_2_ord ~ devitka_excl + years_since + high_interest + 
                  no_kids + has_kids + w1_left_right + female + university, 
                data = cab_listwise, 
                Hess = TRUE)
res_brant_2 <- brant(model_2_ord)

# Model 3 - all predictors + interaction term between personal experience and high political interest
model_3_ord <- polr(w1_vzdel_2_ord ~ devitka_excl + years_since + high_interest + 
                  no_kids + has_kids + w1_left_right + female + university + 
                  devitka_zajem, 
                data = cab_listwise, 
                Hess = TRUE)
res_brant_3 <- brant(model_3_ord)

# Model 4 - all predictors + interaction term between personal experience and the number of years since compelting basic school
model_4_ord <- polr(w1_vzdel_2_ord ~ devitka_excl + years_since + high_interest + 
                  no_kids + has_kids + w1_left_right + female + university + 
                  interakce, 
                data = cab_listwise, 
                Hess = TRUE)
res_brant_4 <- brant(model_4_ord)

# Exporting the model results (as the basis for Table 3.)
tab_model(model_1_ord, model_2_ord, model_3_ord, model_4_ord, 
          digits = 3, digits.p = 4,
          show.se = TRUE, show.ci = FALSE, 
          show.aic = TRUE,
          show.dev = TRUE,
          string.est = "Estimate", string.se = "SE", 
          p.threshold = c(0.05, 0.01, 0.001),
          p.style = "stars")


# Exporting the results of the Brant test for all four models
# Table A2. Proportional odds assumption tests for ordinal logit models (Table 3).
brant_names <- c(rownames(res_brant_3), "interakce")
res_list <- list(res_brant_1, res_brant_2, res_brant_3, res_brant_4)

# For each model, matching its rows to brant_names and extracting the values of three variables (X2, df, p)
merged_brant <- do.call(cbind, lapply(seq_along(res_list), function(i) {
  df <- res_list[[i]]
  df <- df[match(brant_names, rownames(df)), 1:3]
  colnames(df) <- paste0(c("X2", "df", "probability"), "_model", i)
  df
}))

rownames(merged_brant) <- brant_names
View(merged_brant)

# Saving the Table A2 to MS Excel file
# Table A2. Proportional odds assumption tests for ordinal logit models (Table 3).
write.xlsx(merged_brant, 
           file = "Brant_Tests_Results.xlsx", 
           asTable = TRUE, 
           gridLines = FALSE,
           rowNames = TRUE)


##### Plots for predicted probabilities
library(ggplot2)
library(MASS)

##### Figure 2. Predicted probabilities of attitude toward ninth grade abolition by length of basic school experience (Model 2).
### Creating a prediction data frame
# The key independent variable (i.e. the treatment) in the only variable that varies (0 and 1)
# Years since completing basic school is held at its mean.
# High interest, has kid(s), moderate, female with university degree"
pred_data <- data.frame(
  devitka_excl  = c(0, 1),
  years_since   = mean(cab_listwise$years_since),
  high_interest = 1,
  no_kids       = 0,
  has_kids      = 1,
  w1_left_right = 5,
  female        = 1,
  university    = 1)

# Getting predicted probabilities for each category of the dependent variable
pred_probs <- predict(model_2_ord, newdata = pred_data, type = "probs")

# Converting predicted probability to long format required for ggplot
pred_df <- data.frame(devitka_excl = factor(rep(c(0, 1), each = 5),
                                            labels = c("8 years at basic school", "9 years at basic school")),
                      category = factor(rep(1:5, times = 2)),
                      probability = c(pred_probs[1, ], pred_probs[2, ]))

### Bootstrap confidence intervals
library(boot)

boot_probs <- function(data, indices) {
  d <- data[indices, ]
  m <- polr(w1_vzdel_2_ord ~ devitka_excl + years_since + high_interest +
              no_kids + has_kids + w1_left_right + female + university,
            data = d, Hess = TRUE)
  as.vector(predict(m, newdata = pred_data, type = "probs"))
}

set.seed(123)
boot_out <- boot(data = cab_listwise, statistic = boot_probs, R = 1000)

# Extracting 95% percentile CIs
# Bootstrap indices are interleaved: 1,3,5,7,9 = 8 years; 2,4,6,8,10 = 9 years
ci_lower <- numeric(10)
ci_upper <- numeric(10)

for (i in 1:10) {
  ci <- boot.ci(boot_out, type = "perc", index = i)
  ci_lower[i] <- ci$percent[4]
  ci_upper[i] <- ci$percent[5]
}

# Reordering to match pred_df row order (rows 1-5 = 8 years, rows 6-10 = 9 years)
idx_8yrs <- c(1, 3, 5, 7, 9)   # odd indices = 8 years at basic school
idx_9yrs <- c(2, 4, 6, 8, 10)  # even indices = 9 years at basic school

pred_df$ci_lower <- c(ci_lower[idx_8yrs], ci_lower[idx_9yrs])
pred_df$ci_upper <- c(ci_upper[idx_8yrs], ci_upper[idx_9yrs])

# Verifying the alignment and checking the predicted probabilities
print(pred_df)
sum(pred_df[pred_df$devitka_excl == "8 years at basic school", "probability"])
sum(pred_df[pred_df$devitka_excl == "9 years at basic school", "probability"])

### Creating the barplot with predicted probabilities and bootstrap CIs
ggplot(pred_df, aes(x = category, y = probability, fill = devitka_excl)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.7), 
           width = 0.6) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper),
                position = position_dodge(width = 0.7),
                width = 0.2,
                linewidth = 0.7,
                color = "gray30") +
  scale_fill_manual(values = c("steelblue1", "salmon"),
                    name = "Basic school experience:") +
  scale_y_continuous(labels = scales::number_format(accuracy = 0.1),
                     breaks = seq(0, 0.5, 0.1),
                     limits = c(0, 0.55)) +
  scale_x_discrete(labels = c("1 Strongly agree", "2", "3", "4", "5 Strongly disagree")) +
  labs(title = "Probability of Agreement with Ninth Grade Abolition",
       subtitle = "Years since completing basic school at its mean (37 years), high interest, has kid(s), centrist (5 on left-right scale), female, university degree",
       x = "Level of Agreement with Ninth Grade Abolition",
       y = "Predicted probability (based on Model 2)") +
  theme_minimal(base_size = 12) +
  theme(legend.position  = "top",
        plot.title.position = "plot",
        panel.grid.major.x = element_blank())


##### Figure A1. Robustness checks: Predicted probabilities of attitude toward ninth grade abolition by length of basic school experience (Model 2).
# Create a prediction dataset:
# The key independent variable (i.e. the treatment) in the only variable that varies (0 and 1)
# Years since completing basic school held at 5 years.
# Low and moderate political interest, has kid(s), moderate, female with university degree
pred_data2 <- data.frame(
  devitka_excl  = c(0, 1),
  years_since   = 5,
  high_interest = 0,
  no_kids       = 0,
  has_kids      = 1,
  w1_left_right = 5,
  female        = 1,
  university    = 1)

# Getting predicted probabilities for each category of the dependent variable
pred_probs_2 <- predict(model_2_ord, newdata = pred_data2, type = "probs")

# Convertint predicted probability to long format required for ggplot
pred_df_2 <- data.frame(devitka_excl = factor(rep(c(0, 1), each = 5),
                                              labels = c("8 years at basic school", "9 years at basic school")), 
                        category = factor(rep(1:5, times = 2)), 
                        probability = c(pred_probs_2[1, ], pred_probs_2[2, ]))

### Bootstrap confidence intervals
boot_probs_2 <- function(data, indices) {
  d <- data[indices, ]
  m <- polr(w1_vzdel_2_ord ~ devitka_excl + years_since + high_interest +
              no_kids + has_kids + w1_left_right + female + university,
            data = d, Hess = TRUE)
  as.vector(predict(m, newdata = pred_data2, type = "probs"))
}

set.seed(234)
boot_out <- boot(data = cab_listwise, statistic = boot_probs_2, R = 1000)

# Extracting 95% percentile CIs
# Bootstrap indices are interleaved: 1,3,5,7,9 = 8 years; 2,4,6,8,10 = 9 years
ci_lower_2 <- numeric(10)
ci_upper_2 <- numeric(10)

for (i in 1:10) {
  ci <- boot.ci(boot_out, type = "perc", index = i)
  ci_lower_2[i] <- ci$percent[4]
  ci_upper_2[i] <- ci$percent[5]
}

# Reordering to match pred_df row order (rows 1-5 = 8 years, rows 6-10 = 9 years)
idx_8yrs <- c(1, 3, 5, 7, 9)   # odd indices = 8 years at basic school
idx_9yrs <- c(2, 4, 6, 8, 10)  # even indices = 9 years at basic school

pred_df_2$ci_lower_2 <- c(ci_lower_2[idx_8yrs], ci_lower_2[idx_9yrs])
pred_df_2$ci_upper_2 <- c(ci_upper_2[idx_8yrs], ci_upper_2[idx_9yrs])

# Verifying the alignment and checking the predicted probabilities
print(pred_df_2)
sum(pred_df_2[pred_df_2$devitka_excl == "8 years at basic school", "probability"])
sum(pred_df_2[pred_df_2$devitka_excl == "9 years at basic school", "probability"])

### Creating the barplot with predicted probabilities and bootstrap CIs
ggplot(pred_df_2, aes(x = category, y = probability, fill = devitka_excl)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.7), 
           width = 0.6) +
  geom_errorbar(aes(ymin = ci_lower_2, ymax = ci_upper_2),
                position = position_dodge(width = 0.7),
                width = 0.2,
                linewidth = 0.7,
                color = "gray30") +
  scale_fill_manual(values = c("steelblue1", "salmon"),
                    name = "Basic school experience:") +
  scale_y_continuous(labels = scales::number_format(accuracy = 0.1),
                     breaks = seq(0, 0.5, 0.1),
                     limits = c(0, 0.5)) +
  scale_x_discrete(labels = c("1 Strongly agree", "2", "3", "4", "5 Strongly disagree")) +
  labs(title = "Probability of Agreement with Ninth Grade Abolition",
       subtitle = "Years since completing basic school fixed at 5 years, low/moderate interest, has kid(s), centrist (5 on left-right scale), female, university degree",
       x = "Level of Agreement with Ninth Grade Abolition",
       y = "Predicted probability (based on Model 2)") +
  theme_minimal(base_size = 12) +
  theme(legend.position  = "top",
        plot.title.position = "plot",
        panel.grid.major.x = element_blank())
